clear all
set more off 
unicode encoding set Windows-1251

cd "D:\data\MoscowMayorElections\RAW_DATA"

log close _all


set more off
clear matrix
clear mata
clear


* import to Stata election results of 2013 Moscow mayor election
* UIK is the ID of the precinct polling station
insheet using "electionresults2013.txt", double

save "electionresults2013.dta", replace


clear

* import to Stata UIKs in 2013 and up to the 5 closest UIKs in 2012 presidential elections located at the same address
* The IDs of UIKs were changed from 2012 to 2013, so we matched them by the address 
*We found GPS coordinates of UIKs in 2013 and manualy matched up to 5 closest UIKs of 2012 located at the same address. Several UIKs can be located at the same address 
* latitudeuik2013 longitudeuik2013 - Lat and Lon of UIK in 2013
* latitudeuik2012 longitudeuik2012 - Lat and Lon of up to 5 the closest UIKs in 2012 located at the same address to the UIK of 2013
* distuik2012  - distance between matched UIKs from 2013 to 2012
* id_station - ID of metro station
* distmetro - distance from Metro station to UIK2013
* order from metro - order of UIK from the closest metro station


insheet using "uiks_2013.txt", double

rename metro id_station
save "uiks2013.dta", replace
	


clear

* import metro stations with GPS coordinates. Manager=1 and Manager=2 indicate stations where the newspaper was distributed 
insheet using "metroelection_2013.txt", double

save "metro2013.dta", replace

* we save 5 copies of election results to merge with up to 5 UIKs from 2012 to get the average percentage for each presidential candidate for each UIK in 2013


forvalues s_k = 1/5 {
clear

unicode translate "2012_pres_4.dta"
use "2012_pres_4.dta"

* we keep the results only for Moscow region=77
keep if region==77



sort uik
duplicates drop uik, force
rename uik uik2012_

*calculating percent for each candidate as a number of votes for candidates divided by sum of total votes inside polling stations and outside (home) votes
 	
replace percent_putin=v23_c/(v4_c+v7_c)
g percent_zhir=v19_c/(v4_c+v7_c)
g percent_zu=v20_c/(v4_c+v7_c)
g percent_mir=v21_c/(v4_c+v7_c)
g percent_proh=v22_c/(v4_c+v7_c)

foreach var_i of varlist * {
rename `var_i' `var_i'`s_k'
}

save "pres_elections2012_`s_k'", replace


}



use uiks2013.dta
merge 1:1 uik2013 using electionresults2013

keep if _merge==3
drop _merge



merge m:1 id_station using metro2013

keep if _merge==3
drop _merge

forvalues s_k = 1/5 {
merge m:1 uik2012_`s_k' using "pres_elections2012_`s_k'"
drop if _merge==2
drop _merge

}

*we drop all uiks which are further than 2 km from the closest metro stations because it is highly unlikely that people who live further than 2 km from the entrance to metro station use this metro station for trasportation 
drop if distmetro>2

* we calculate percentage for all candidates at Moscow Mayor election
g percent_navalny=navalny2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_sobyanin=sobyanin2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_degtyarev=degryarev2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_levichev=levichev2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_melnikov=melnikov2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_mitrokhin=mitrokhin2013/(n_ballot_valid2013+n_ballot_invalid2013)
g percent_turnout2013=(n_ballot_valid2013+n_ballot_invalid2013)/n_voters2013
g percent_invalid=n_ballot_invalid2013/(n_ballot_invalid2013+n_ballot_valid2013)
rename turnout* percent_turnout2012*


*for each UIK in 2013, we estimate the average percentage of votes for presidential candidates in 2012 for up to 5 closest UIK in 2012 with the same address

foreach var_p in putin zhir zu mir proh turnout2012 {

g sum_percent_`var_p'=0
g n_percent_`var_p'=0

forvalues s_k = 1/5 {
replace sum_percent_`var_p'=sum_percent_`var_p'+percent_`var_p'`s_k' if percent_`var_p'`s_k'<. 
replace n_percent_`var_p'=n_percent_`var_p'+1 if percent_`var_p'`s_k'<. 
}

g percent_`var_p'=sum_percent_`var_p'/n_percent_`var_p'

}


* we multiply each percentage by 100 to represent units in percentages
foreach var_p of varlist percent_* {
replace `var_p'=`var_p'*100

}

* we create variable newspaper which indicate metro stations where the newspaper was distributed

g newspaper = (manager==1)|(manager==2)

* in the merged final file we keep only variables needed for the main analysis and drop all other variables from the initial raw files 

keep order* uik2013 percent_zu percent_zhir percent_mir percent_proh  percent_putin  percent_turnout2012 percent_sobyanin percent_melnikov percent_degtyarev percent_levichev percent_mitrokhin percent_navalny percent_turnout2013 percent_invalid newspaper station id_line distmetro 

label var uik2013 "UIK - id of the polling station in 2013"
label var orderfrom "Order of UIK from metro station"
label var percent_zu "Percent for Zuganov in 2012 presidential election"
label var percent_zhir "Percent for Zhirinovksy in 2012 presidential election"
label var percent_put "Percent for Putin in 2012 presidential election"
label var percent_mir "Percent for Mironov in 2012 presidential election"
label var percent_proh "Percent for Prokhorov in 2012 presidential election"

label var percent_turnout2012 "Turnout in 2012 presidential election"


label var percent_degt "Percent for Degtyarov in 2013 Moscow mayor election"

label var percent_sobyanin "Percent for Sobyanin in 2013 Moscow mayor election"
label var percent_melnikov "Percent for Melnikov in 2013 Moscow mayor election"
label var percent_degt "Percent for Degtyarov in 2013 Moscow mayor election"
label var percent_lev "Percent for Levichev in 2013 Moscow mayor election"
label var percent_mitr "Percent for Mitrokhin in 2013 Moscow mayor election"
label var percent_nav "Percent for Navalny in 2013 Moscow mayor election"
label var percent_turnout2013 "Turnout in 2013 Moscow mayor election"
label var percent_invalid "Percent invalid in 2013 Moscow mayor election"
label var newspaper "1 if newspaper was distributed"
label var distmetro "Distance from metro to the polling station"

*since we did treatment selection by metro line pairs, we generate a list of metro line pairs to cluster errors later
gen metrolinepairs=0

replace metrolinepairs=1 if (id_line==15) | (id_line==16)
replace metrolinepairs=2 if (id_line==1) | (id_line==17)
replace metrolinepairs=3 if (id_line==2) | (id_line==4)
replace metrolinepairs=4 if (id_line==5) | (id_line==6)
replace metrolinepairs=5 if (id_line==8) | (id_line==9)
replace metrolinepairs=6 if (id_line==10) | (id_line==12)
replace metrolinepairs=7 if (id_line==13) | (id_line==14)

*generate dummy for transport hubs because they could not be included in treatment

g station_hub = (station=="Schelkovskaya")|(station=="Electrozavodskaya")|(station=="VDNH")|(station=="Medvedkovo")|(station=="Savelovskaya")|(station=="Timiryazevskaya")|(station=="Petrovsko-Razumovskaya")
replace station_hub=station_hub|(station=="Altufievo")|(station=="Rechnoy Vokzal")|(station=="Tushinskaya")|(station=="Begovaya")|(station=="Kuntsevskaya")|(station=="Fili")|(station=="Yugo-Zapadnaya") 
replace station_hub=station_hub|(station=="Teply Stan")|(station=="Nagatinskaya")|(station=="Tsaritsyno")|(station=="Domodedovskaya")|(station=="Tekstilschiki")|(station=="Vykhino")|(station=="Novogireevo") 
replace station_hub=station_hub|(station=="Voikovskaya")|(station=="Rizhskaya")

*generate dummy variable for 5, 6-10, 11-15, and 15 closest UIKs to the metro station

g col1=orderfrommetro<=5
g col2=(orderfrommetro<=10) & (orderfrommetro>5)
g col3=(orderfrommetro<=15) & (orderfrommetro>10)
g col4=orderfrommetro<=15


saveold data_moscow_elections_Jan2024, replace version(12)

